Last modified 07:38 PM on May 17, 2016. This document, R session image, version history, knitr cache, figures, and other associated datasets are located in /inside/grotto/blin/trna-markers/luad/predict/.
library(GenomicRanges)
library(RColorBrewer)
library(pheatmap)
library(plyr)
library(Hmisc)
load('/inside/home/blin/grotto/trna-markers/process-reads/luad-counts.RData')
load('/inside/home/blin/grotto/trna-markers/process-reads/luad-metadata.RData')
load('/inside/grotto/blin/data/hg19-srnas.RData')
excludeExtraneousGenes = function(counts, threshold = 0.96) {
cor = cor(t(counts))
cor[lower.tri(cor)] = NA # remove duplicates (x-y and y-x)
cor_genes = data.frame(cbind(which(!is.na(cor), arr.ind = TRUE), na.omit(as.vector(cor))))
cor_genes$row = rownames(cor)[cor_genes$row]
cor_genes$col = colnames(cor)[cor_genes$col]
cor_genes = cor_genes[cor_genes$V3 != 1 & cor_genes$V3 > threshold, ]
if (nrow(cor_genes) == 0) return(rownames(cor))
extraneous_genes = c()
ok_genes = c()
for (i in 1:nrow(cor_genes)) {
gene1 = cor_genes$row[i]
gene2 = cor_genes$col[i]
if (!(gene1 %in% extraneous_genes) & !(gene1 %in% ok_genes)) {
if (!(gene2 %in% extraneous_genes) & !(gene2 %in% ok_genes)) {
ok_genes = c(ok_genes, gene1)
extraneous_genes = c(extraneous_genes, gene2)
}
else {
extraneous_genes = c(extraneous_genes, gene1)
}
}
else {
extraneous_genes = c(extraneous_genes, gene2)
}
}
ok_genes
}
filterVariance = function(counts, var = 0.5) {
gene_var = sapply(data.frame(t(counts)), var)
names(gene_var) = rownames(counts)
counts[which(gene_var > quantile(gene_var, 0.5)), ]
}
Default metadata for all heatmaps
counts = luad_adjusted_counts[rownames(luad_adjusted_counts) %in% srnas[srnas$class != 'tRNA']$tx_name, ]
heatmap_metadata = luad_metadata[luad_metadata$barcode %in% colnames(counts), ]
# subtype
load('/inside/grotto/blin/trna-markers/luad/cluster-subtypes/clusters.RData')
clusters$subtype[!clusters$known] = "Unknown"
heatmap_metadata$subtype = as.factor(clusters$subtype[match(substr(heatmap_metadata$barcode, 1, 12), clusters$barcode)])
# stage
levels(heatmap_metadata$stage) = list(I = c("Stage I", "Stage IA", "Stage IB"), II = c("Stage II", "Stage IIA", "Stage IIB"), III = c("Stage III", "Stage IIIA", "Stage IIIB"), IV = "Stage IV", Unknown = '[Not Available]')
# n stage
levels(heatmap_metadata$n_stage) = list(N0 = "N0", N1 = "N1", N2 = "N2", N3 = "N3", Unknown = c("NX", "[Not Available]"))
# smoker
heatmap_metadata$smoker = as.factor(heatmap_metadata$smoker)
levels(heatmap_metadata$smoker) = list(Smoker = 1, Nonsmoker = 0)
# survival
heatmap_metadata$survival = as.factor(heatmap_metadata$days_survived > 1000)
levels(heatmap_metadata$survival) = list(Long = TRUE, Short = FALSE)
# heatmap annotation
levels(heatmap_metadata$sample_type) = list(TP = "TP", NT = "NT")
annot_col = data.frame(stage = heatmap_metadata$stage, subtype = heatmap_metadata$subtype, n_stage = heatmap_metadata$n_stage, sample_type = heatmap_metadata$sample_type, smoker = heatmap_metadata$smoker)
rownames(annot_col) = colnames(counts)
annot_row = data.frame(sRNA = as.factor(srnas[match(rownames(counts), srnas$tx_name), ]$class))
rownames(annot_row) = rownames(counts)
annot_colors = list(stage = setNames(c(brewer.pal(4, "Set1"), "grey"), levels(heatmap_metadata$stage)),
subtype = setNames(c(brewer.pal(3, "Set2"), "grey"), levels(heatmap_metadata$subtype)),
n_stage = setNames(c(brewer.pal(4, "Set3"), "grey"), levels(heatmap_metadata$n_stage)),
sample_type = setNames(c("#EF8A62", "#67A9CF"), levels(heatmap_metadata$sample_type)),
smoker = setNames(c("#8dd3c7", "#ffffb3", "grey"), levels(heatmap_metadata$smoker)),
survival = setNames(c("#1f78b4", "#984ea3", "grey"), levels(heatmap_metadata$survival)),
sRNA = setNames(c(brewer.pal(8, "Paired"), "grey"), levels(annot_row$sRNA)))
df = log1p(counts) df = df[rownames(df) %in% excludeExtraneousGenes(df), ] pheatmap(df, annotation_col = annot_col, annotation_row = annot_row, annotation_colors = annot_colors, fontsize = 8, show_colnames = FALSE, show_rownames = FALSE, trace = "none", color = brewer.pal(9, "Reds"))
Fold change is relative to matched tumor-normal pairs or median normal expression.
getLog2FC = function(counts, metadata) {
counts = counts - min(counts) + 1
paired_tp_samples = which(metadata$sample_type == "TP" & metadata$paired)
log2fc = data.frame(numeric(nrow(counts)))
median_nt_expression = apply(counts[, paired_tp_samples+1], 1, median)
for (i in 1:ncol(counts)) {
sample_metadata = metadata[i, ]
if (sample_metadata$sample_type == "NT") next
tp_counts = counts[, i]
if (sample_metadata$paired) nt_counts = counts[, i+1]
else nt_counts = median_nt_expression
log2fc = cbind(log2fc, log2(tp_counts) - log2(nt_counts))
}
colnames(log2fc) = metadata[metadata$sample_type == "TP", ]$barcode
log2fc[, -1]
}
log2fc = getLog2FC(counts, luad_metadata)
metadata = heatmap_metadata[heatmap_metadata$barcode %in% colnames(log2fc), ] pheatmap(log2fc, annotation_col = annot_col, annotation_row = annot_row, annotation_colors = annot_colors, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = FALSE, trace = "none", color = brewer.pal(10, "RdBu"))
# filter out genes by correlation genelist = excludeExtraneousGenes(log2fc) df = log2fc[rownames(log2fc) %in% genelist, ] kable(data.frame(Original = table(srnas[match(rownames(log2fc), srnas$tx_name), ]$class), Noncorrelated = table(srnas[match(genelist, srnas$tx_name), ]$class)))
| Original.Var1 | Original.Freq | Noncorrelated.Var1 | Noncorrelated.Freq |
|---|---|---|---|
| genomic-tRF-3 | 762 | genomic-tRF-3 | 94 |
| genomic-tRF-5 | 790 | genomic-tRF-5 | 132 |
| miRNA | 1529 | miRNA | 79 |
| snoRNA | 397 | snoRNA | 25 |
| tRF-1 | 215 | tRF-1 | 6 |
| tRF-3 | 340 | tRF-3 | 49 |
| tRF-5 | 360 | tRF-5 | 21 |
| tRF-i | 395 | tRF-i | 32 |
# filter metadata metadata = heatmap_metadata[heatmap_metadata$barcode %in% colnames(df), ] pheatmap(df, annotation_col = annot_col, annotation_row = annot_row, annotation_colors = annot_colors, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = FALSE, trace = "none", color = brewer.pal(10, "RdBu"))
# filter out genes by fold change variance df = filterVariance(df, 0.5) kable(data.frame(Noncorrelated = table(srnas[match(genelist, srnas$tx_name), ]$class), HiVariance = table(srnas[match(rownames(df), srnas$tx_name), ]$class)))
| Noncorrelated.Var1 | Noncorrelated.Freq | HiVariance.Var1 | HiVariance.Freq |
|---|---|---|---|
| genomic-tRF-3 | 94 | genomic-tRF-3 | 39 |
| genomic-tRF-5 | 132 | genomic-tRF-5 | 68 |
| miRNA | 79 | miRNA | 51 |
| snoRNA | 25 | snoRNA | 19 |
| tRF-1 | 6 | tRF-1 | 1 |
| tRF-3 | 49 | tRF-3 | 24 |
| tRF-5 | 21 | tRF-5 | 12 |
| tRF-i | 32 | tRF-i | 5 |
# filter metadata metadata = heatmap_metadata[heatmap_metadata$barcode %in% colnames(df), ] pheatmap(df, annotation_col = annot_col, annotation_row = annot_row, annotation_colors = annot_colors, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = FALSE, trace = "none", color = brewer.pal(10, "RdBu"))
df = log2fc[srnas[match(rownames(log2fc), srnas$tx_name), ]$class == "miRNA", ] # heatmap annotation annot_colors2 = annot_colors annot_colors2$sRNA = NA pheatmap(df, annotation_col = annot_col, annotation_colors = annot_colors2, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = FALSE, trace = "none", color = brewer.pal(10, "RdBu"))
# filter for gene variance and extraneous genes df = df[rownames(df) %in% excludeExtraneousGenes(df), ] df = filterVariance(df) pheatmap(df, annotation_col = annot_col, annotation_colors = annot_colors2, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = TRUE, trace = "none", color = brewer.pal(10, "RdBu"))
Li et al. has a list of "significant" miRNAs, let's see if their prediction holds up in terms of log(fold change):
df = log2fc[c("hsa-mir-196b", "hsa-mir-205", "hsa-mir-519a-1", "hsa-mir-31", "hsa-mir-7-1", "hsa-mir-193b", "hsa-mir-766", "hsa-mir-187", "hsa-let-7c", "hsa-mir-331", "hsa-mir-133a-1", "hsa-mir-375", "hsa-mir-323b", "hsa-mir-99a", "hsa-mir-101-1", "hsa-mir-374b"), ]
pheatmap(df, annotation_col = annot_col, annotation_colors = annot_colors2, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = TRUE, cluster_rows = FALSE, trace = "none", color = brewer.pal(10, "RdBu"))
df = log2fc[srnas[match(rownames(log2fc), srnas$tx_name), ]$class %in% c("tRF-1", "tRF-3", "tRF-5", "genomic-tRF-3", "genomic-tRF-5", "tRF-i"), ]
# heatmap annotation
annot_row2 = data.frame(tRF = as.factor(srnas[match(rownames(df), srnas$tx_name), ]$class))
rownames(annot_row2) = rownames(df)
annot_colors2 = annot_colors
annot_colors2$tRF = setNames(brewer.pal(6, "Paired"), levels(annot_row2$tRF))
pheatmap(df, annotation_col = annot_col, annotation_row = annot_row2, annotation_colors = annot_colors2, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = FALSE, trace = "none", color = brewer.pal(10, "RdBu"))
df = log2fc[srnas[match(rownames(log2fc), srnas$tx_name), ]$class %in% c("tRF-1", "tRF-3", "tRF-5", "tRF-i"), ]
# heatmap annotation
annot_row2 = data.frame(tRF = as.factor(srnas[match(rownames(df), srnas$tx_name), ]$class))
rownames(annot_row2) = rownames(df)
annot_colors2 = annot_colors
annot_colors2$tRF = setNames(brewer.pal(4, "Set1"), levels(annot_row2$tRF))
pheatmap(df, annotation_col = annot_col, annotation_row = annot_row2, annotation_colors = annot_colors2, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = TRUE, trace = "none", color = brewer.pal(10, "RdBu"))
# filter for gene variance and extraneous genes df = df[rownames(df) %in% excludeExtraneousGenes(df), ] df = filterVariance(df) # heatmap annotation annot_row2 = data.frame(tRF = as.factor(srnas[match(rownames(df), srnas$tx_name), ]$class)) rownames(annot_row2) = rownames(df) pheatmap(df, annotation_col = annot_col, annotation_row = annot_row2, annotation_colors = annot_colors2, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = TRUE, trace = "none", color = brewer.pal(10, "RdBu"))
df2 = df[, metadata[order(metadata$subtype), ]$barcode]
df2 = df2[, metadata[order(metadata$subtype), ]$subtype %in% c("PP", "PI", "TRU")]
annot_col2 = annot_col[colnames(df2), ]
pheatmap(df2, annotation_col = annot_col2, annotation_row = annot_row2, annotation_colors = annot_colors2, cluster_cols = FALSE, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = TRUE, trace = "none", color = brewer.pal(10, "RdBu"))
df2 = df[, metadata[order(metadata$stage), ]$barcode]
df2 = df2[, metadata[order(metadata$stage), ]$stage %in% c("I", "II", "III", "IV")]
annot_col2 = annot_col[colnames(df2), ]
pheatmap(df2, annotation_col = annot_col2, annotation_row = annot_row2, annotation_colors = annot_colors2, cluster_cols = FALSE, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = TRUE, trace = "none", color = brewer.pal(10, "RdBu"))
## Saving search path.. ## Saving list of loaded packages.. ## Saving all data... ## Done.